{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Congestion Charging - Easy\n",
    "\n",
    "![rel](https://sqlzoo.net/w/images/f/f6/CongestionCharge.png)\n",
    "\n",
    "camera(**id**, perim)\n",
    "\n",
    "keeper(**id**, name, address)\n",
    "\n",
    "vehicle(**id**, keeper)\n",
    "\n",
    "image(**_camera_**, **whn**, reg)\n",
    "\n",
    "permit(**_reg_**, **sDate**, chargeType)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting default log level to \"WARN\".\n",
      "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n"
     ]
    }
   ],
   "source": [
    "import findspark\n",
    "import pandas as pd\n",
    "findspark.init()\n",
    "\n",
    "SVR = '192.168.31.31'\n",
    "from pyspark.sql import SparkSession\n",
    "from pyspark.sql.functions import *\n",
    "from pyspark.sql import Window\n",
    "\n",
    "sc = (SparkSession.builder.appName('app18-1') \n",
    "      .master(f'spark://{SVR}:7077') \n",
    "      .config('spark.sql.warehouse.dir', f'hdfs://{SVR}:9000/user/hive/warehouse') \n",
    "      .config('spark.cores.max', '4') \n",
    "      .config('spark.executor.instances', '1') \n",
    "      .config('spark.executor.cores', '2') \n",
    "      .config('spark.executor.memory', '10g') \n",
    "      .enableHiveSupport().getOrCreate())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "camera = sc.read.table('sqlzoo.camera')\n",
    "keeper = sc.read.table('sqlzoo.keeper')\n",
    "vehicle = sc.read.table('sqlzoo.vehicle')\n",
    "image = sc.read.table('sqlzoo.image')\n",
    "permit = sc.read.table('sqlzoo.permit')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Sample query\n",
    "\n",
    "List the vehicles for which 'Strenuous, Sam' is the registered keeper. The link between Keepers and Vehicles is via the foreign key specified in the CREATE TABLE vehicle statement. Note the line:\n",
    "\n",
    "```\n",
    " ,FOREIGN KEY(keeper) REFERENCES keeper(id)\n",
    "```\n",
    "\n",
    "This will be the basis of our join condition."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.\n",
    "Show the name and address of the keeper of vehicle SO 02 PSP."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "                                                                                \r"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Strenuous, Sam</td>\n",
       "      <td>Surjection Street</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             name            address\n",
       "0  Strenuous, Sam  Surjection Street"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(keeper.join(vehicle.filter(vehicle['id']=='SO 02 PSP'),\n",
    "             on=(keeper['id']==vehicle['keeper']))\n",
    " .select('name', 'address')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.\n",
    "Show the number of cameras that take images for incoming vehicles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   cnt\n",
       "0    8"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(camera.filter(col('perim')=='IN')\n",
    " .groupBy()\n",
    " .agg(count('id').alias('cnt'))\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.\n",
    "List the image details taken by Camera 10 before 26 Feb 2007."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>camera</th>\n",
       "      <th>whn</th>\n",
       "      <th>reg</th>\n",
       "      <th>id</th>\n",
       "      <th>perim</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>2007-02-25 06:10:13.0</td>\n",
       "      <td>SO 02 ASP</td>\n",
       "      <td>1</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>17</td>\n",
       "      <td>2007-02-25 06:20:01.0</td>\n",
       "      <td>SO 02 ASP</td>\n",
       "      <td>17</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 06:23:40.0</td>\n",
       "      <td>SO 02 ASP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 06:26:04.0</td>\n",
       "      <td>SO 02 ASP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>17</td>\n",
       "      <td>2007-02-25 06:57:31.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>17</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>17</td>\n",
       "      <td>2007-02-25 07:00:40.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>17</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>12</td>\n",
       "      <td>2007-02-25 07:04:31.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>12</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>5</td>\n",
       "      <td>2007-02-25 07:10:00.0</td>\n",
       "      <td>SO 02 GSP</td>\n",
       "      <td>5</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>16</td>\n",
       "      <td>2007-02-25 07:13:00.0</td>\n",
       "      <td>SO 02 GSP</td>\n",
       "      <td>16</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>2</td>\n",
       "      <td>2007-02-25 07:20:01.0</td>\n",
       "      <td>SO 02 TSP</td>\n",
       "      <td>2</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 07:23:00.0</td>\n",
       "      <td>SO 02 TSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 07:26:31.0</td>\n",
       "      <td>SO 02 TSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 07:29:00.0</td>\n",
       "      <td>SO 02 TSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>8</td>\n",
       "      <td>2007-02-25 07:35:41.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>8</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 07:39:04.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 07:42:30.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>10</td>\n",
       "      <td>2007-02-25 07:45:11.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>10</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>8</td>\n",
       "      <td>2007-02-25 07:48:10.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>8</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 07:51:10.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 07:55:11.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>11</td>\n",
       "      <td>2007-02-25 07:58:01.0</td>\n",
       "      <td>SO 02 CSP</td>\n",
       "      <td>11</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 16:28:40.0</td>\n",
       "      <td>SO 02 SSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 16:29:11.0</td>\n",
       "      <td>SO 02 DSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 16:31:01.0</td>\n",
       "      <td>SO 02 SSP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 16:31:01.0</td>\n",
       "      <td>SO 02 DSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 16:38:31.0</td>\n",
       "      <td>SO 02 RSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 16:39:10.0</td>\n",
       "      <td>SO 02 RSP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 16:45:04.0</td>\n",
       "      <td>SO 02 HSP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 16:48:11.0</td>\n",
       "      <td>SO 02 HSP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 16:51:30.0</td>\n",
       "      <td>SO 02 HSP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 16:58:01.0</td>\n",
       "      <td>SO 02 ISP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>12</td>\n",
       "      <td>2007-02-25 17:01:13.0</td>\n",
       "      <td>SO 02 ISP</td>\n",
       "      <td>12</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>3</td>\n",
       "      <td>2007-02-25 17:07:00.0</td>\n",
       "      <td>SO 02 JSP</td>\n",
       "      <td>3</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>18</td>\n",
       "      <td>2007-02-25 17:10:43.0</td>\n",
       "      <td>SO 02 JSP</td>\n",
       "      <td>18</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 17:14:11.0</td>\n",
       "      <td>SO 02 JSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>3</td>\n",
       "      <td>2007-02-25 17:16:11.0</td>\n",
       "      <td>SO 02 ESP</td>\n",
       "      <td>3</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>3</td>\n",
       "      <td>2007-02-25 17:17:03.0</td>\n",
       "      <td>SO 02 JSP</td>\n",
       "      <td>3</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>19</td>\n",
       "      <td>2007-02-25 17:42:41.0</td>\n",
       "      <td>SO 02 DSP</td>\n",
       "      <td>19</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>11</td>\n",
       "      <td>2007-02-25 18:08:00.0</td>\n",
       "      <td>SO 02 FSP</td>\n",
       "      <td>11</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>12</td>\n",
       "      <td>2007-02-25 18:08:13.0</td>\n",
       "      <td>SO 02 GSP</td>\n",
       "      <td>12</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>10</td>\n",
       "      <td>2007-02-25 18:08:40.0</td>\n",
       "      <td>SO 02 ESP</td>\n",
       "      <td>10</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>10</td>\n",
       "      <td>2007-02-25 18:23:11.0</td>\n",
       "      <td>SO 02 MUP</td>\n",
       "      <td>10</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>11</td>\n",
       "      <td>2007-02-25 18:26:13.0</td>\n",
       "      <td>SO 02 NUP</td>\n",
       "      <td>11</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>12</td>\n",
       "      <td>2007-02-25 18:29:01.0</td>\n",
       "      <td>SO 02 OUP</td>\n",
       "      <td>12</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>3</td>\n",
       "      <td>2007-02-25 18:33:10.0</td>\n",
       "      <td>SO 02 PUP</td>\n",
       "      <td>3</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>15</td>\n",
       "      <td>2007-02-25 18:36:31.0</td>\n",
       "      <td>SO 02 PUP</td>\n",
       "      <td>15</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>3</td>\n",
       "      <td>2007-02-25 18:39:10.0</td>\n",
       "      <td>SO 02 PUP</td>\n",
       "      <td>3</td>\n",
       "      <td>IN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>9</td>\n",
       "      <td>2007-02-25 18:54:30.0</td>\n",
       "      <td>SO 02 DSP</td>\n",
       "      <td>9</td>\n",
       "      <td>OUT</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    camera                    whn        reg  id perim\n",
       "0        1  2007-02-25 06:10:13.0  SO 02 ASP   1    IN\n",
       "1       17  2007-02-25 06:20:01.0  SO 02 ASP  17  None\n",
       "2       18  2007-02-25 06:23:40.0  SO 02 ASP  18  None\n",
       "3        9  2007-02-25 06:26:04.0  SO 02 ASP   9   OUT\n",
       "4       17  2007-02-25 06:57:31.0  SO 02 CSP  17  None\n",
       "5       17  2007-02-25 07:00:40.0  SO 02 CSP  17  None\n",
       "6       12  2007-02-25 07:04:31.0  SO 02 CSP  12   OUT\n",
       "7        5  2007-02-25 07:10:00.0  SO 02 GSP   5    IN\n",
       "8       16  2007-02-25 07:13:00.0  SO 02 GSP  16   OUT\n",
       "9        2  2007-02-25 07:20:01.0  SO 02 TSP   2    IN\n",
       "10      19  2007-02-25 07:23:00.0  SO 02 TSP  19  None\n",
       "11      19  2007-02-25 07:26:31.0  SO 02 TSP  19  None\n",
       "12      19  2007-02-25 07:29:00.0  SO 02 TSP  19  None\n",
       "13       8  2007-02-25 07:35:41.0  SO 02 CSP   8    IN\n",
       "14      18  2007-02-25 07:39:04.0  SO 02 CSP  18  None\n",
       "15      18  2007-02-25 07:42:30.0  SO 02 CSP  18  None\n",
       "16      10  2007-02-25 07:45:11.0  SO 02 CSP  10   OUT\n",
       "17       8  2007-02-25 07:48:10.0  SO 02 CSP   8    IN\n",
       "18      19  2007-02-25 07:51:10.0  SO 02 CSP  19  None\n",
       "19      18  2007-02-25 07:55:11.0  SO 02 CSP  18  None\n",
       "20      11  2007-02-25 07:58:01.0  SO 02 CSP  11   OUT\n",
       "21      18  2007-02-25 16:28:40.0  SO 02 SSP  18  None\n",
       "22      18  2007-02-25 16:29:11.0  SO 02 DSP  18  None\n",
       "23       9  2007-02-25 16:31:01.0  SO 02 SSP   9   OUT\n",
       "24      19  2007-02-25 16:31:01.0  SO 02 DSP  19  None\n",
       "25      18  2007-02-25 16:38:31.0  SO 02 RSP  18  None\n",
       "26       9  2007-02-25 16:39:10.0  SO 02 RSP   9   OUT\n",
       "27       9  2007-02-25 16:45:04.0  SO 02 HSP   9   OUT\n",
       "28       9  2007-02-25 16:48:11.0  SO 02 HSP   9   OUT\n",
       "29       9  2007-02-25 16:51:30.0  SO 02 HSP   9   OUT\n",
       "30       9  2007-02-25 16:58:01.0  SO 02 ISP   9   OUT\n",
       "31      12  2007-02-25 17:01:13.0  SO 02 ISP  12   OUT\n",
       "32       3  2007-02-25 17:07:00.0  SO 02 JSP   3    IN\n",
       "33      18  2007-02-25 17:10:43.0  SO 02 JSP  18  None\n",
       "34      19  2007-02-25 17:14:11.0  SO 02 JSP  19  None\n",
       "35       3  2007-02-25 17:16:11.0  SO 02 ESP   3    IN\n",
       "36       3  2007-02-25 17:17:03.0  SO 02 JSP   3    IN\n",
       "37      19  2007-02-25 17:42:41.0  SO 02 DSP  19  None\n",
       "38      11  2007-02-25 18:08:00.0  SO 02 FSP  11   OUT\n",
       "39      12  2007-02-25 18:08:13.0  SO 02 GSP  12   OUT\n",
       "40      10  2007-02-25 18:08:40.0  SO 02 ESP  10   OUT\n",
       "41      10  2007-02-25 18:23:11.0  SO 02 MUP  10   OUT\n",
       "42      11  2007-02-25 18:26:13.0  SO 02 NUP  11   OUT\n",
       "43      12  2007-02-25 18:29:01.0  SO 02 OUP  12   OUT\n",
       "44       3  2007-02-25 18:33:10.0  SO 02 PUP   3    IN\n",
       "45      15  2007-02-25 18:36:31.0  SO 02 PUP  15   OUT\n",
       "46       3  2007-02-25 18:39:10.0  SO 02 PUP   3    IN\n",
       "47       9  2007-02-25 18:54:30.0  SO 02 DSP   9   OUT"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(image.filter(image['whn'] < '2007-02-26')\n",
    " .join(camera, on=(image['camera']==camera['id']))\n",
    " .orderBy('whn', 'camera')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.\n",
    "List the number of images taken by each camera. Your answer should show how many images have been taken by camera 1, camera 2 etc. The list must NOT include the images taken by camera 15, 16, 17, 18 and 19."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>camera</th>\n",
       "      <th>cnt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>12</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>10</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>11</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   camera  cnt\n",
       "0      12    4\n",
       "1       1    1\n",
       "2       3    5\n",
       "3       5    1\n",
       "4       9    8\n",
       "5       8    2\n",
       "6      10    4\n",
       "7      11    3\n",
       "8       2    1"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(image.filter(~ image['camera'].between(15, 19))\n",
    " .groupBy('camera')\n",
    " .agg(count('reg').alias('cnt'))\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5.\n",
    "A number of vehicles have permits that start on 30th Jan 2007. List the name and address for each keeper in alphabetical order without duplication."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>address</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ambiguous, Arthur</td>\n",
       "      <td>Absorption Ave.</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Assiduous, Annie</td>\n",
       "      <td>Attribution Alley</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Contiguous, Carol</td>\n",
       "      <td>Circumscription Close</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Strenuous, Sam</td>\n",
       "      <td>Surjection Street</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                name                address\n",
       "0  Ambiguous, Arthur        Absorption Ave.\n",
       "1   Assiduous, Annie      Attribution Alley\n",
       "2  Contiguous, Carol  Circumscription Close\n",
       "3     Strenuous, Sam      Surjection Street"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "(keeper.withColumnRenamed('id', 'keeper')\n",
    " .join(vehicle, on='keeper')\n",
    " .join(permit.filter(to_date(permit['sdate'])=='2007-01-30'),\n",
    "       on=(vehicle['id']==permit['reg']))\n",
    " .select('name', 'address')\n",
    " .distinct()\n",
    " .orderBy('name')\n",
    " .toPandas())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
